From 5bd5bfb978df5f345518d2444d8f2ddba849d8d6 Mon Sep 17 00:00:00 2001
From: Hans Dembinski <HDembinski@users.noreply.github.com>
Date: Mon, 7 Dec 2020 15:12:20 +0100
Subject: [PATCH] Bug-fix for one-sided cropping and clarify how cropping works
 (#302)

---
 boost/histogram/algorithm/reduce.hpp | 28 +++++++--
 libs/histogram/test/algorithm_reduce_test.cpp               | 63 ++++++++++++++++----
 2 files changed, 77 insertions(+), 14 deletions(-)

diff --git a/include/boost/histogram/algorithm/reduce.hpp b/include/boost/histogram/algorithm/reduce.hpp
index a5d2fffa6..0646db34e 100644
--- a/boost/histogram/algorithm/reduce.hpp
+++ b/boost/histogram/algorithm/reduce.hpp
@@ -111,7 +111,14 @@ inline reduce_command crop(unsigned iaxis, double lower, double upper) {
   Command is applied to corresponding axis in order of reduce arguments.
 
   Works like `shrink` (see shrink documentation for details), but counts in removed bins
-  are discarded, whether underflow and overflow bins are present or not.
+  are discarded, whether underflow and overflow bins are present or not. If the cropped
+  range goes beyond the axis range, then the content of the underflow
+  or overflow bin which overlaps with the range is kept.
+
+  If the counts in an existing underflow or overflow bin are discared by the crop, the
+  corresponding memory cells are not physically removed. Only their contents are set to
+  zero. This technical limitation may be lifted in the future, then crop may completely
+  remove the cropped memory cells.
 
   @param lower bin which contains lower is first to be kept.
   @param upper bin which contains upper is last to be kept, except if upper is equal to
@@ -323,6 +330,8 @@ inline reduce_command slice_and_rebin(axis::index_type begin, axis::index_type e
   exception. Histograms with  non-reducible axes can still be reduced along the
   other axes that are reducible.
 
+  An overload allows one to pass reduce_command as positional arguments.
+
   @param hist original histogram.
   @param options iterable sequence of reduce commands: `shrink`, `slice`, `rebin`,
   `shrink_and_rebin`, or `slice_and_rebin`. The element type of the iterable should be
@@ -343,14 +352,16 @@ Histogram reduce(const Histogram& hist, const Iterable& options) {
         auto& o = opts[iaxis];
         o.is_ordered = axis::traits::ordered(a_in);
         if (o.merge > 0) { // option is set?
-          o.use_underflow_bin = !o.crop && AO::test(axis::option::underflow);
-          o.use_overflow_bin = !o.crop && AO::test(axis::option::overflow);
+          o.use_underflow_bin = AO::test(axis::option::underflow);
+          o.use_overflow_bin = AO::test(axis::option::overflow);
           return detail::static_if_c<axis::traits::is_reducible<A>::value>(
               [&o](const auto& a_in) {
                 if (o.range == reduce_command::range_t::none) {
+                  // no range restriction, pure rebin
                   o.begin.index = 0;
                   o.end.index = a_in.size();
                 } else {
+                  // range striction, convert values to indices as needed
                   if (o.range == reduce_command::range_t::values) {
                     const auto end_value = o.end.value;
                     o.begin.index = axis::traits::index(a_in, o.begin.value);
@@ -359,7 +370,14 @@ Histogram reduce(const Histogram& hist, const Iterable& options) {
                     if (axis::traits::value_as<double>(a_in, o.end.index) != end_value)
                       ++o.end.index;
                   }
-                  // limit [begin, end] to [0, size()]
+
+                  // crop flow bins if index range does not include them
+                  if (o.crop) {
+                    o.use_underflow_bin &= o.begin.index < 0;
+                    o.use_overflow_bin &= o.end.index > a_in.size();
+                  }
+
+                  // now limit [begin, end] to [0, size()]
                   if (o.begin.index < 0) o.begin.index = 0;
                   if (o.end.index > a_in.size()) o.end.index = a_in.size();
                 }
@@ -435,6 +453,8 @@ Histogram reduce(const Histogram& hist, const Iterable& options) {
   the other axes. Trying to reducing a non-reducible axis triggers an invalid_argument
   exception.
 
+  An overload allows one to pass an iterable of reduce_command.
+
   @param hist original histogram.
   @param opt first reduce command; one of `shrink`, `slice`, `rebin`,
   `shrink_and_rebin`, or `slice_or_rebin`.
diff --git a/test/algorithm_reduce_test.cpp b/test/algorithm_reduce_test.cpp
index 6ea42fadf..c6b4df4a4 100644
--- a/libs/histogram/test/algorithm_reduce_test.cpp
+++ b/libs/histogram/test/algorithm_reduce_test.cpp
@@ -105,15 +105,16 @@ void run_tests() {
     BOOST_TEST_EQ(reduce(h, crop(0, 1.999)).axis(), ID(0, 2));
   }
 
+  // shrink and rebin
   {
     auto h = make_s(Tag(), std::vector<int>(), R(4, 1, 5, "1"), R(3, -1, 2, "2"));
 
     /*
       matrix layout:
-      x
+      x ->
     y 1 0 1 0
-      1 1 0 0
-      0 2 1 3
+    | 1 1 0 0
+    v 0 2 1 3
     */
     h.at(0, 0) = 1;
     h.at(0, 1) = 1;
@@ -122,11 +123,13 @@ void run_tests() {
     h.at(2, 0) = 1;
     h.at(2, 2) = 1;
     h.at(3, 2) = 3;
+    h.at(-1, -1) = 1; // underflow
+    h.at(4, 3) = 1; // overflow
 
     // should do nothing, index order does not matter
     auto hr = reduce(h, shrink(1, -1, 2), rebin(0, 1));
     BOOST_TEST_EQ(hr.rank(), 2);
-    BOOST_TEST_EQ(sum(hr), 10);
+    BOOST_TEST_EQ(sum(hr), 12);
     BOOST_TEST_EQ(hr.axis(0), R(4, 1, 5, "1"));
     BOOST_TEST_EQ(hr.axis(1), R(3, -1, 2, "2"));
     BOOST_TEST_EQ(hr, h);
@@ -138,7 +141,7 @@ void run_tests() {
     // shrinking along first axis
     hr = reduce(h, shrink(0, 2, 4));
     BOOST_TEST_EQ(hr.rank(), 2);
-    BOOST_TEST_EQ(sum(hr), 10);
+    BOOST_TEST_EQ(sum(hr), 12);
     BOOST_TEST_EQ(hr.axis(0), R(2, 2, 4, "1"));
     BOOST_TEST_EQ(hr.axis(1), R(3, -1, 2, "2"));
     BOOST_TEST_EQ(hr.at(-1, 0), 1); // underflow
@@ -164,7 +167,7 @@ void run_tests() {
 
     hr = reduce(h, shrink_and_rebin(0, 2, 5, 2), rebin(1, 3));
     BOOST_TEST_EQ(hr.rank(), 2);
-    BOOST_TEST_EQ(sum(hr), 10);
+    BOOST_TEST_EQ(sum(hr), 12);
     BOOST_TEST_EQ(hr.axis(0), R(1, 2, 4, "1"));
     BOOST_TEST_EQ(hr.axis(1), R(1, -1, 2, "2"));
     BOOST_TEST_EQ(hr.at(-1, 0), 2); // underflow
@@ -184,16 +187,16 @@ void run_tests() {
     BOOST_TEST_EQ(hr4, hr);
   }
 
-  // crop
+  // crop and rebin
   {
     auto h = make_s(Tag(), std::vector<int>(), R(4, 1, 5), R(3, 1, 4));
 
     /*
       matrix layout:
-      x
+      x ->
     y 1 0 1 0
-      1 1 0 0
-      0 2 1 3
+    | 1 1 0 0
+    v 0 2 1 3
     */
     h.at(0, 0) = 1;
     h.at(0, 1) = 1;
@@ -202,6 +205,9 @@ void run_tests() {
     h.at(2, 0) = 1;
     h.at(2, 2) = 1;
     h.at(3, 2) = 3;
+    h.at(-1, -1) = 1; // underflow
+    h.at(4, 3) = 1; // overflow
+
 
     /*
       crop first and last column in x and y
@@ -231,6 +237,43 @@ void run_tests() {
     BOOST_TEST_EQ(hr, hr4);
   }
 
+  // one-sided crop
+  {
+    auto h = make_s(Tag(), std::vector<int>(), ID(1, 4));
+    std::fill(h.begin(), h.end(), 1);
+    // underflow: 1
+    // index 0, x 1: 1
+    // index 1, x 2: 1
+    // index 2, x 3: 1
+    // overflow: 1
+    BOOST_TEST_EQ(sum(h), 5);
+    BOOST_TEST_EQ(h.size(), 5);
+
+    // keep underflow
+    auto hr1 = reduce(h, crop(0, 3));
+    BOOST_TEST_EQ(hr1, reduce(h, slice(-1, 2, slice_mode::crop)));
+    BOOST_TEST_EQ(sum(hr1), 3);
+    BOOST_TEST_EQ(hr1.size(), 4); // flow bins are not physically removed, only zeroed
+
+    // remove underflow
+    auto hr2 = reduce(h, crop(1, 3));
+    BOOST_TEST_EQ(hr2, reduce(h, slice(0, 2, slice_mode::crop)));
+    BOOST_TEST_EQ(sum(hr2), 2);
+    BOOST_TEST_EQ(hr2.size(), 4); // flow bins are not physically removed, only zeroed
+
+    // keep overflow
+    auto hr3 = reduce(h, crop(2, 5));
+    BOOST_TEST_EQ(hr3, reduce(h, slice(1, 4, slice_mode::crop)));
+    BOOST_TEST_EQ(sum(hr3), 3);
+    BOOST_TEST_EQ(hr3.size(), 4); // flow bins are not physically removed, only zeroed
+
+    // remove overflow
+    auto hr4 = reduce(h, crop(2, 4));
+    BOOST_TEST_EQ(hr4, reduce(h, slice(1, 3, slice_mode::crop)));
+    BOOST_TEST_EQ(sum(hr4), 2);
+    BOOST_TEST_EQ(hr4.size(), 4); // flow bins are not physically removed, only zeroed
+  }
+
   // mixed axis types
   {
     R r(5, 0.0, 5.0);
